import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action='ignore')
feature = pd.read_csv('Feature_Dataset.csv')
shuffled = sklearn.utils.shuffle(feature)
from sklearn.model_selection import train_test_split
# train : test = 0.8 : 0.2
train, test = train_test_split(shuffled, test_size=0.2)
y_train = train.iloc[:,:7]
x_train = train.iloc[:,7:]
y_test = test.iloc[:,:7]
x_test = test.iloc[:,7:]
# min-MAX norm.
x_train = (x_train - x_train.mean())/x_train.std()
x_test = (x_test - x_test.mean())/x_test.std()
yy_train = np.argmax(y_train.to_numpy(), axis=1)
yy_test = np.argmax(y_test.to_numpy(), axis=1)
#RandomForest test
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(max_depth=5)
rf.fit(x_train,yy_train)
RandomForestClassifier(max_depth=5)
from sklearn.metrics import accuracy_score
prd = rf.predict(x_test)
accuracy = accuracy_score(yy_test, prd)
print('Acc: ', accuracy)
Acc: 0.75
from sklearn.metrics import confusion_matrix
import seaborn as sns
classes = ['Conga&Bongo', 'Crash&OpenHat', 'Hi-Hat', 'Kick', 'Shaker', 'Snare', 'Tom']
cm = pd.DataFrame(confusion_matrix(yy_test, prd), columns=classes, index=classes)
sns.heatmap(cm, annot=True)
<AxesSubplot:>
# define feature names (from 9th cell of extract.ipynb)
feature_name = []
feature_name.append('duration')
for ch in ['ZCR', 'Flt', 'Bdw', 'Roll']:
for i in range(14):
name = str(ch) + '[' + str(i) + ']'
feature_name.append(name)
# Extract single tree
estimator = rf.estimators_[5]
from sklearn.tree import export_graphviz
# Export as dot file
export_graphviz(estimator, out_file='tree.dot',
feature_names = feature_name,
class_names = classes,
rounded = True, proportion = False,
precision = 2, filled = True)
# Convert to png using system command (requires Graphviz)
from subprocess import call
call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=600'])
# Display in jupyter notebook
from IPython.display import Image
Image(filename = 'tree.png')
from sklearn.metrics import accuracy_score, log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
classifiers = [
KNeighborsClassifier(3),
SVC(kernel="rbf", C=0.025, probability=True),
NuSVC(probability=True),
DecisionTreeClassifier(),
RandomForestClassifier(),
AdaBoostClassifier(),
GradientBoostingClassifier(),
GaussianNB(),
LinearDiscriminantAnalysis(),
QuadraticDiscriminantAnalysis(),
MLPClassifier(alpha=1, max_iter=1000)
]
# Logging for Visual Comparison
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)
for clf in classifiers:
clf.fit(x_train, yy_train)
name = clf.__class__.__name__
print("="*30)
print(name)
print('****Results****')
train_predictions = clf.predict(x_test)
acc = accuracy_score(yy_test, train_predictions)
print("Accuracy: {:.4%}".format(acc))
train_predictions = clf.predict_proba(x_test)
ll = log_loss(yy_test, train_predictions)
print("Log Loss: {}".format(ll))
log_entry = pd.DataFrame([[name, acc*100, ll]], columns=log_cols)
log = log.append(log_entry)
print("="*30)
============================== KNeighborsClassifier ****Results**** Accuracy: 66.9118% Log Loss: 6.128277598514636 ============================== SVC ****Results**** Accuracy: 38.9706% Log Loss: 0.9916739454351426 ============================== NuSVC ****Results**** Accuracy: 72.0588% Log Loss: 0.7207909185016207 ============================== DecisionTreeClassifier ****Results**** Accuracy: 72.0588% Log Loss: 9.650540463283873 ============================== RandomForestClassifier ****Results**** Accuracy: 76.4706% Log Loss: 1.0770614451183227 ============================== AdaBoostClassifier ****Results**** Accuracy: 32.3529% Log Loss: 3.860725515386247 ============================== GradientBoostingClassifier ****Results**** Accuracy: 77.9412% Log Loss: 0.7663721369166487 ============================== GaussianNB ****Results**** Accuracy: 58.8235% Log Loss: 9.179024213052204 ============================== LinearDiscriminantAnalysis ****Results**** Accuracy: 74.2647% Log Loss: 1.5838210200380267 ============================== QuadraticDiscriminantAnalysis ****Results**** Accuracy: 38.9706% Log Loss: 21.081219404720372 ============================== MLPClassifier ****Results**** Accuracy: 76.4706% Log Loss: 0.6914668319333664 ==============================
sns.set_color_codes("muted")
sns.barplot(x='Accuracy', y='Classifier', data=log, color="b")
plt.xlabel('Accuracy %')
plt.title('Classifier Accuracy')
plt.show()
sns.set_color_codes("muted")
sns.barplot(x='Log Loss', y='Classifier', data=log, color="g")
plt.xlabel('Log Loss')
plt.title('Classifier Log Loss')
plt.show()